package net.paoding.analysis.analyzer;

import net.paoding.analysis.analyzer.impl.MaxWordLengthTokenCollector;
import net.paoding.analysis.analyzer.impl.MostWordsTokenCollector;
import net.paoding.analysis.knife.PaodingMaker;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;

import java.io.IOException;
import java.io.Reader;
import java.util.Map;

/**
 * 实现Solr分词器接口 基于PaodingTokenizer的实现
 * 
 * @author zippoy(mashupeye@163.com)<br>
 *         2014年3月26日 下午12:20:55<br>
 * 	Remark:<br>
 *		
 * 
 */
public final class PaodingTokenizerFactory extends TokenizerFactory implements ResourceLoaderAware {

	/**
     * 最多切分   默认模式
     */
    public static final String MOST_WORDS_MODE = "most-words";
    /**
     * 按最大切分
     */
    public static final String MAX_WORD_LENGTH_MODE = "max-word-length";

    private String mode = MOST_WORDS_MODE;

    public void setMode(String mode) {
        if (mode.startsWith("class:")) {
            setModeClass(mode.substring("class:".length()));
        } else {
            if ("most-words".equalsIgnoreCase(mode)
                    || "default".equalsIgnoreCase(mode)
                    || MOST_WORDS_MODE.equals(mode)) {
                setMode(MOST_WORDS_MODE);
            } else if ("max-word-length".equalsIgnoreCase(mode)
                    || MAX_WORD_LENGTH_MODE.equals(mode)) {
                setMode(MAX_WORD_LENGTH_MODE);
            } else {
                throw new IllegalArgumentException("不合法的分析器Mode参数设置:" + mode);
            }
        }
    }
    
    public void setModeClass(String modeClass) {
        /*try {
            this.modeClass = Class.forName(modeClass);
        } catch (ClassNotFoundException e) {
            throw new IllegalArgumentException("not found mode class:" + e.getMessage());
        }*/
    }

	public String getMode() {
		return mode;
	}

	/**
	 * Paoding分词器Solr TokenizerFactory接口实现类 默认最细粒度切分算法
	 */
	public PaodingTokenizerFactory(Map<String, String> args) {
		super(args);
		//setMode(args.get("mode"));
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.apache.solr.analysis.TokenizerFactory#create(java.io.Reader)
	 */
	@Override
	public Tokenizer create(AttributeFactory factory, Reader input) {
		return new PaodingTokenizer(input, PaodingMaker.make(), createTokenCollector());
		//return new PaodingAnalyzer().createComponents(null, input).getTokenizer();
	}

	@Override
	public void inform(ResourceLoader loader) throws IOException {
		//String dicPath = getOriginalArgs().get("dicPath");
	}
	
    private TokenCollector createTokenCollector() {
        if( MOST_WORDS_MODE.equals(mode)) {
             return new MostWordsTokenCollector();
        } else if( MAX_WORD_LENGTH_MODE.equals(mode)) {
             return new MaxWordLengthTokenCollector();
        } else {
        	return new MostWordsTokenCollector();
        }
        //  throw new Error("never happened");
    }

}
